rm(list=ls()) #clear

library(summarytools)
library(dplyr)
library(psych)

std01<-function(x){
  min.x<-min(x, na.rm=T)
  max.x<-max(x-min.x, na.rm=T)
  return((x-min.x)/max.x)
} 

#importing data from Qualtrics and demographic data from Prolific
Qual <- read.csv("Study 2 - Prolific/Qual_S2.csv")
Demo_REP <- read.csv("Study 2 - Prolific/Demo_Rep_S2.csv")                     
Demo_DEM <- read.csv("Study 2 - Prolific/Demo_Dem_S2.csv")

#merge the data using the participant's Prolific ID
datraw<-merge(Qual, Demo_REP, by="participant_id", all = "TRUE")
dat<-merge(datraw, Demo_DEM, by="participant_id", all = "TRUE")

#removing disqualified
dat<- subset(dat, dat$Finished==1)
dat<- subset(dat, dat$Consent.Form==1)

#cleaning
#party ID
dat$pid01<- ifelse(dat$pid7<4, 0, 1)
dat$pid01f<-factor(dat$pid01, labels = c("Democrat", "Republican"))

dat$o1<- coalesce(dat$ro1, dat$do1)
dat$o2<- coalesce(dat$ro2, dat$do2)
dat$o3<- coalesce(dat$ro3, dat$do3)
dat$o4<- coalesce(dat$ro4, dat$do4)
dat$o5<- coalesce(dat$ro5, dat$do5)
dat$o6<- coalesce(dat$ro6, dat$do6)
dat$o7<- coalesce(dat$ro7, dat$do7)
dat$o8<- coalesce(dat$ro8, dat$do8)

dat$a1<- coalesce(dat$ra1, dat$da1)
dat$a2<- coalesce(dat$ra2, dat$da2)
dat$a3<- coalesce(dat$ra3, dat$da3)
dat$a4<- coalesce(dat$ra4, dat$da4)
dat$a5<- coalesce(dat$ra5, dat$da5)
dat$a6<- coalesce(dat$ra6, dat$da6)

dat$m1<- coalesce(dat$rm1, dat$dm1)
dat$m2<- coalesce(dat$rm2, dat$dm2)
dat$m3<- coalesce(dat$rm3, dat$dm3)
dat$m4<- coalesce(dat$rm4, dat$dm4)
dat$m5<- coalesce(dat$rm5, dat$dm5)
dat$m6<- coalesce(dat$rm6, dat$dm6)

dat$id1<- coalesce(dat$rid1, dat$did1)
dat$id2<- coalesce(dat$rid2, dat$did2)
dat$id3<- coalesce(dat$rid3, dat$did3)
dat$id4<- coalesce(dat$rid4, dat$did4)

dat$pv1<- coalesce(dat$rpv1, dat$dpv1)
dat$pv2<- coalesce(dat$rpv2, dat$dpv2)
dat$pv3<- coalesce(dat$rpv3, dat$dpv3)
dat$pv4<- coalesce(dat$rpv4, dat$dpv4)

#reverse coded items
dat<- dat %>% 
  mutate_at(c("a3", "a4", "a6"),
            funs(dplyr::recode(., "1"=7, "2"=6, "3"=5, "4"=4, "5"=3, "6"=2, "7"=1))) #ps items
dat<- dat %>%
  mutate_at(c("id1", "id2", "id4"),
            funs(dplyr::recode(., "1"=4, "2"=3, "3"=2, "4"=1)))
dat<- dat %>%
  mutate_at(c("id3"),
            funs(dplyr::recode(., "1"=5, "2"=4, "3"=3, "4"=2, "5"=1))) #huddy items
dat<- dat %>% 
  mutate_at(c("rotg1", "rotg2", "rotg3", "rotg4", "rotg5", "rotg6",
              "rotg7", "rotg9", "rotg10", "rotg11", "rotg12"),
            funs(dplyr::recode(., "9"=1, "10"=2, "11"=3, "12"=4, "13"=5, "14"=6, "15"=7)))
dat<- dat %>% 
  mutate_at(c("rotg8"),
            funs(dplyr::recode(., "9"=7, "10"=6, "11"=5, "12"=4, "13"=3, "14"=2, "15"=1))) #fixing and reverse coding

# aps and sub-scales
dat$otot20<-rowMeans(with(dat, cbind(o1, o2, o3, o4, o5, o6, o7, o8)))
dat$atot20<-rowMeans(with(dat, cbind(a1, a2, a3, a4, a5, a6)))
dat$mtot20<-rowMeans(with(dat, cbind(m1, m2, m3, m4, m5, m6)))
dat$apstot20<-rowMeans(with(dat, cbind(o1, o2, o3, o4, o5, o6, o7, o8,
                                          a1, a2, a3, a4, a5, a6,
                                          m1, m2, m3, m4, m5, m6)))

# short scales
dat$otot9<-rowMeans(with(dat, cbind(o1, o6, o8)))
dat$atot9<-rowMeans(with(dat, cbind(a1, a2, a4)))
dat$mtot9<-rowMeans(with(dat, cbind(m2, m4, m6)))
dat$apstot9<-rowMeans(with(dat, cbind(o1, o6, o8, a1, a2, a4, m2, m4, m6)))

descr(dat$apstot9)
descr(dat$otot9)
descr(dat$atot9)
descr(dat$mtot9)

#feel therm party 
dat$ftinparty<- ifelse(dat$pid01==0, dat$ftdem, dat$ftrep)
dat$ftoutparty<- ifelse(dat$pid01==0, dat$ftrep, dat$ftdem)
dat$wbias<-(dat$ftinparty-dat$ftoutparty)

#feel therm ideology 
dat$ftinideo<- ifelse(dat$pid01==0, dat$ftlib, dat$ftcon)
dat$ftoutideo<- ifelse(dat$pid01==0, dat$ftcon, dat$ftlib)
dat$ideowbias<- abs(dat$ftcon-dat$ftlib)

#ideology
dat$rideo7<-(dat$ideo7-1)/6
#ideological extremity, 0-3
dat$idex<-abs(dat$ideo7-4)
#ideological extremity, 0-1
dat$ridex<-std01(dat$idex)
#ideological extremity, 0-3, factor variable.
dat$fidex<-as.factor(dat$idex)

# party ID extremity (RELABELED)
# 0, 1, 2 coding:
dat$pidex<- ifelse(dat$pid7==1 | dat$pid7==7, 2, 
                      ifelse(dat$pid7==2 | dat$pid7==6, 1,
                             ifelse(dat$pid7==3 | dat$pid7==5, 0, NA)))
# 0, 0.5, 1 coding:
dat$rpidex<-std01(dat$pidex)
# make 0, 1, 2 coding a factor variable
dat$fpidex<-as.factor(dat$pidex)

## trait ratings (higher means greater distance between tr of inparty and outparty)

# in-party evals
dat$trip1<- ifelse(dat$pid01==0, dat$dtr_1, dat$rtr_1)
dat$trip2<- ifelse(dat$pid01==0, dat$dtr_2, dat$rtr_2)
dat$trip3<- ifelse(dat$pid01==0, dat$dtr_3, dat$rtr_3)
dat$trip4<- ifelse(dat$pid01==0, dat$dtr_4, dat$rtr_4)
dat$trip5<- ifelse(dat$pid01==0, dat$dtr_5, dat$rtr_5)
dat$trip6<- ifelse(dat$pid01==0, dat$dtr_6, dat$rtr_6) 
dat$trip7<- ifelse(dat$pid01==0, dat$dtr_7, dat$rtr_7)
dat$trip8<- ifelse(dat$pid01==0, dat$dtr_8, dat$rtr_8) 
# out-party evals
dat$trop1<- ifelse(dat$pid01==0, dat$rtr_1, dat$dtr_1)
dat$trop2<- ifelse(dat$pid01==0, dat$rtr_2, dat$dtr_2)
dat$trop3<- ifelse(dat$pid01==0, dat$rtr_3, dat$dtr_3)
dat$trop4<- ifelse(dat$pid01==0, dat$rtr_4, dat$dtr_4)
dat$trop5<- ifelse(dat$pid01==0, dat$rtr_5, dat$dtr_5)
dat$trop6<- ifelse(dat$pid01==0, dat$rtr_6, dat$dtr_6)
dat$trop7<- ifelse(dat$pid01==0, dat$rtr_7, dat$dtr_7)
dat$trop8<- ifelse(dat$pid01==0, dat$rtr_8, dat$dtr_8)
# inparty-outparty - given diff codings above for Ds, Rs no abs value needed.
dat$tr1<- (dat$trip1-dat$trop1)
dat$tr2<- (dat$trip2-dat$trop2)
dat$tr3<- (dat$trip3-dat$trop3)
dat$tr4<- (dat$trip4-dat$trop4)
dat$tr5<- (dat$trip5-dat$trop5)
dat$tr6<- (dat$trip6-dat$trop6)
dat$tr7<- (dat$trip7-dat$trop7)
dat$tr8<- (dat$trip8-dat$trop8)

psych::alpha(with(dat, cbind(trop1, trop2, trop3, trop4, trop5, trop6, trop7, trop8))) #.93
psych::alpha(with(dat, cbind(trip1, trip2, trip3, trip4, trip5, trip6, trip7, trip8))) #.92
psych::alpha(with(dat, cbind(tr1, tr2, tr3, tr4, tr5, tr6, tr7, tr8))) #.91
dat$trtot<-rowMeans(with(dat, cbind(tr1, tr2, tr3, tr4, tr5, tr6, tr7, tr8)))
dat$triptot<-rowMeans(with(dat, cbind(trip1, trip2, trip3, trip4, trip5, trip6, trip7, trip8)))
dat$troptot<-rowMeans(with(dat, cbind(trop1, trop2, trop3, trop4, trop5, trop6, trop7, trop8)))

#huddy items
psych::alpha(with(dat, cbind(id1, id2, id3, id4))) #.85
dat$idtot<-rowMeans(with(dat, cbind(id1, id2, id3, id4)))

#partisan violence items
psych::alpha(with(dat, cbind(pv1, pv2, pv3, pv4))) #.81
dat$pvtot<-rowMeans(with(dat, cbind(pv1, pv2, pv3, pv4)))

#rules of the game items
psych::alpha(with(dat, cbind(rotg1, rotg2, rotg3, rotg4, rotg5, rotg6,
                                rotg7, rotg8, rotg9, rotg10, rotg11, rotg12))) #.85
dat$rotgtot<-rowMeans(with(dat, cbind(rotg1, rotg2, rotg3, rotg4, rotg5, rotg6,
                                            rotg7, rotg8, rotg9, rotg10, rotg11, rotg12)))

#recoding demographic vars
dat$sex<-ifelse(!is.na(dat$Sex.x), dat$Sex.x, dat$Sex.y)
dat<- subset(dat, dat$sex!="CONSENT REVOKED")
dat$male<- ifelse(dat$sex == "Male", 1, 0)

dat$age<-ifelse(!is.na(dat$age.x), dat$age.x, dat$age.y)

dat$raceshort<-ifelse(!is.na(dat$Ethnicity..Simplified..x), dat$Ethnicity..Simplified..x, dat$Ethnicity..Simplified..y)
dat$white<- ifelse(dat$raceshort == "White", 1, 0)

dat$racelong<-ifelse(!is.na(dat$Ethnicity.x), dat$Ethnicity.x, dat$Ethnicity.y)
dat$hisplat<- ifelse(dat$racelong == "Latino/Hispanic" | dat$racelong == "White Mexican", 1, 0)

dat$education<-ifelse(!is.na(dat$Highest.education.level.completed.x), dat$Highest.education.level.completed.x, dat$Highest.education.level.completed.y)
dat$ba<- ifelse(dat$education == "Undergraduate degree (BA/BSc/other)" | 
                  dat$education == "Graduate degree (MA/MSc/MPhil/other)" |
                  dat$education == "Doctorate degree (PhD/other)", 1, 
                ifelse(dat$education == "No formal qualifications" | 
                         dat$education == "Don't know / not applicable" |
                         dat$education == "High school diploma/A-levels" |
                         dat$education == "Secondary education (e.g. GED/GCSE)" |
                         dat$education == "Technical/community college", 0, NA))

dat$income<-ifelse(!is.na(dat$Household.Income..USD...US.participants.only..x), dat$Household.Income..USD...US.participants.only..x, dat$Household.Income..USD...US.participants.only..y)
dat$inc<- ifelse(dat$income == "Less than $10000", 1,
                 ifelse(dat$income == "$10000–$15999", 2,
                        ifelse(dat$income == "$16000–$19999", 3,
                               ifelse(dat$income == "$20000–$29999", 4,
                                      ifelse(dat$income == "$30000–$39999", 5,
                                             ifelse(dat$income == "$40000–$49999", 6,
                                                    ifelse(dat$income == "$50000–$59999", 7,
                                                           ifelse(dat$income == "$60000–$69999", 8,
                                                                  ifelse(dat$income == "$70000–$79999", 9,
                                                                         ifelse(dat$income == "$80000–$89999", 10,
                                                                                ifelse(dat$income == "$90000–$99999", 11,
                                                                                       ifelse(dat$income == "$100000–$149999", 12,
                                                                                              ifelse(dat$income == "More than $150000", 13, NA)))))))))))))

summary(dat$age)
sd(dat$age)

table(dat$raceshort)
table(dat$sex)
table(dat$ba)
table(dat$white)
table(dat$hisplat)
table(dat$inc)
freq(dat$pid01)

#recode 0 to 1
dat<- dat %>%
  mutate_at(c("otot20", "atot20", "mtot20", "apstot20", 
              "otot9", "atot9", "mtot9", "apstot9",
              "idtot", "ftinparty", "ftoutparty", "wbias", 
              "trtot", "triptot", "troptot",
              "ftinideo", "ftoutideo", "ideowbias",
              "trtot", "pvtot", "rotgtot", "age", "inc"),
            funs(std01(.)))

dat<- dat[c("o1", "o2", "o3", "o4", "o5", "o6", "o7", "o8",
            "a1", "a2", "a3", "a4", "a5", "a6",
            "m1", "m2", "m3", "m4", "m5", "m6",
            "otot20", "atot20", "mtot20", "apstot20", "otot9", "atot9", "mtot9", "apstot9",
            "pid7", "pid01", "idtot", "rpidex", "fpidex", "rideo7", "ridex", "fidex",
            "ftinparty", "ftoutparty", "wbias", "ftinideo", "ftoutideo", "ideowbias",
            "trtot", "triptot", "troptot", 
            "pvtot", "rotgtot",
            "male", "age", "white", "hisplat", "ba", "inc")]

save(dat, file = "Study 2 - Prolific/dataS2.RData")

dat2<- dat
save(dat2, file = "Main Analyses/dataS2.RData")
